For this Seurat analysis, we used the following parameters:

pars <- as.data.frame(do.call("rbind", params))
colnames(pars) <- "parameters_used"
pars

Starting with this Seurat object from Mireille.

path <- "/fast/work/groups/cubi/milekm/mireille/de/fracture-healing-and-aging-scseq/DE_analysis/miha"
sobj <- readRDS(file.path(path,params$path_to_object))
selected_cell_types <- sobj[[]] %>% 
  pull(cluster_id) %>% 
  unique()

sample <- sobj[[]] %>%
  select(group,mouse) %>%
  mutate(sample=paste0(group,"_",mouse)) %>%
  select(sample)

sobj <- AddMetaData(sobj, metadata = sample)

# cluster_metadata_3 <- sobj[[]] %>%
#     mutate(age = str_extract(group, "^[0-9]+w")) %>%
#     mutate(strain = str_extract(group, "w(E|SPF|)")) %>% 
#     mutate(time = str_extract(group, "[0-9]+days")) %>%
#     mutate(arm = paste(age, strain, time, sep = "_")) %>%
#     group_by(arm) %>%
#     mutate(pseudoID = sprintf("ID%02d", as.numeric(as.factor(mouse)))) %>% 
#   select(sample, cell_type, group, mouse, age, location, time, strain,pseudoID) %>% 
#   distinct()

expr <- list()

for (cell_type in selected_cell_types) {
 for (sample in unique(sobj[[]]$sample)) {
   # sample <- unique(sobj[[]]$sample)[1]
   # cell_type <- selected_cell_types[1]
   cells <- Cells(sobj)[(sobj@meta.data$sample==sample & sobj$cluster_id==cell_type )  ] 
   cells <- cells[!is.na(cells)]
   if (length(cells) > 1) {
     expr[[paste0(cell_type,';',sample)]] <- rowSums(sobj@assays$RNA@counts[,cells])
   }
 }
}

for (sample in unique(sobj[[]]$sample)) {
 cells <- Cells(sobj)[(sobj@meta.data$sample==sample )]
 cells <- cells[!is.na(cells)]
 expr[[paste0('all;',sample)]] <- rowSums(sobj@assays$RNA@counts[,cells])
}


sample_name <- sub(".*-","",sub(";.*","", sub(';','-',names(expr))))

colData<-data.frame(sample=names(expr),
               cell_type=factor(sub(';.*','',names(expr))),
               sample_name=sample_name,
               group=sub("_11.*","",sub(".*;","",names(expr))),
               mouse=sub('.*s_11','11',names(expr)) ,
               age=sub("w.*","w", sample_name),
               location=sub("_.*", "", sub(".*[wEF]","", sample_name )),
               time = sub(".*_", "", sub("days.*", "days", sample_name)),
               strain = ifelse(grepl("12w",sample_name), "young",
                               ifelse(grepl("SPF", sample_name), "aged",
                                      ifelse(grepl("wE", sample_name),"immuno", NA))))

colData <- colData %>%
  mutate(age_time_strain = paste0(age, "_", time, "_", strain)) %>%
  group_by(age_time_strain) %>%
  mutate(pseudoID = sprintf("ID%02d", as.numeric(as.factor(mouse))))

counts <- do.call(cbind, expr)

Get number of reads per sample.

lengths <- colSums(counts)
df_lengths <- data.frame(sample = names(lengths),
                         n_reads = lengths) %>%
  mutate(group = sub("_11.*","",sub(".*;", "",sample)),
         celltype = sub(";.*", "", sample))  

ggplot(df_lengths, aes(celltype,n_reads,fill=group))+
  geom_boxplot(position=position_dodge(width=.5),width=.5, outlier.shape = 1, outlier.colour = "red")+
  geom_point(position=position_dodge(width=.5),colour='grey30', shape=1, size=2) +
  facet_wrap(~celltype, scales="free")

We run DESeq2 by ~group, separating each contrast and celltype. This does not account for total variance of the experiment, so results are somewhat less reliable.

selected_cell_types <- c("Monocytes / Macrophages", "Neutrophils", "Myeloid Precursor", "T Cells", "B Cells", "Dendritic Cells", "all")

# selected_cell_types <- c("B Cells", "Dendritic Cells")
# selected_cell_types <- c("all")
selected_contrasts <- c("12wfx_5days_vs_12wfx_2days",
                        "26wEfx_5days_vs_26wEfx_2days",
                        "26wSPFfx_5days_vs_26wSPFfx_2days",
                        "26wEfx_2days_vs_12wfx_2days",
                        "26wSPFfx_2days_vs_12wfx_2days",
                        "26wSPFfx_2days_vs_26wEfx_2days",
                        "12wdist_5days_vs_12wfx_5days",
                        "12wprox_5days_vs_12wfx_5days",
                        "12wdist_5days_vs_12wprox_5days",
                        "26wEdist_5days_vs_26wEfx_5days",
                        "26wEprox_5days_vs_26wEfx_5days",
                        "26wEdist_5days_vs_26wEprox_5days",
                        "26wSPFdist_5days_vs_26wSPFfx_5days",
                        "26wSPFprox_5days_vs_26wSPFfx_5days",
                        "26wSPFdist_5days_vs_26wSPFprox_5days")
# selected_contrasts <- c("12wfx_5days_vs_12wfx_2days",
                        # "26wEfx_5days_vs_26wEfx_2days")
# all_groups <- colData$group %>%
#   unique()
# contrasts <- expand_grid(perturbation=all_groups, reference=all_groups) %>%
#       filter(perturbation < reference) %>%
#       mutate(contrast = paste0(perturbation,"_vs_",reference)) %>%
#       pull(contrast)

res <- list()

contrasts <- selected_contrasts
for (comparison in contrasts){
  perturbation <- sub("_vs_.*", "", comparison)
  reference <- sub(".*_vs_", "", comparison)
  sub_res <- list()
  for (celltype in selected_cell_types){
    coldata_df <- colData %>%
      dplyr::filter(cell_type %in% celltype, group %in% c(perturbation, reference))
    counts_df <- subset(counts, select = coldata_df$sample)
    
    if (sum(colSums(counts_df)) != 0){
      dds <- DESeqDataSetFromMatrix(countData=as.matrix(counts_df),
                                    colData=coldata_df,
                                    design=~group)
    }
    dds$group <- relevel(dds$group, ref = reference)
    
    if (celltype != "all" & celltype != "all_cells"){
      dds <- estimateSizeFactors(dds, type='poscounts')
      dds <- DESeq(dds, test = 'LRT', reduced = ~1, useT=TRUE, minmu=1e-6, minReplicatesForReplace=Inf)
    } else {
      dds <- estimateSizeFactors(dds)
      dds <- DESeq(dds)
    }
    sub_res[[celltype]] <- lfcShrink(dds, coef=2, type=params$shrinkage_type, format="DataFrame") %>%
      as.data.frame() %>%
      tibble::rownames_to_column('gene_name') %>%
      mutate(cell_type=celltype, contrast = comparison)
  }
  res[[comparison]] <- do.call("rbind", sub_res) %>%
  tibble::remove_rownames()
}


res_df <- do.call("rbind", res) %>%
  tibble::remove_rownames()
for (comparison in unique(res_df$contrast)){
  p <- res_df %>%                  
    dplyr::filter(!is.na(padj), contrast == comparison) %>%
    ggplot(aes(x=log2FoldChange,y=-log10(pvalue),color=padj < .05)) + 
    geom_point(size=.5, shape=1) +
    geom_label_repel(data=res_df %>%
                       dplyr::filter(contrast == comparison,padj < .05),
                     aes(label=gene_name),
                     segment.size=.25,segment.alpha=.5,size=3,color='black',max.overlaps=40,
                     label.padding=0.05,label.size=NA) +
    scale_color_manual(values=c('black','red')) +
    theme_classic() +
    theme(legend.position='none',
          strip.background=element_blank())+
    ggtitle(comparison)+
    facet_wrap(~cell_type, ncol = 2, scales = "free")
  print(p)
  
}

# for (comparison in unique(res_df$contrast)){
#   p <- res_df %>%
#     dplyr::filter(contrast == comparison) %>%
#     ggplot(aes(x=baseMean,y=log2FoldChange,color=padj < .05)) +
#     geom_point(size=.5, shape=1) +
#     geom_label_repel(data=res_df %>%
#                        dplyr::filter(contrast == comparison, padj < .05),
#                      aes(label=gene_name),
#                      segment.size=.25,segment.alpha=.5,size=3,color='black',max.overlaps=30,
#                      label.padding=0.05,label.size=NA) +
#     scale_color_manual(values=c('black','red')) +
#     scale_x_continuous(trans='log10') +
#     coord_cartesian(ylim =c(-4,4)) +
#     theme_classic() +
#     theme(legend.position='none',
#           strip.background=element_blank())+
#     ggtitle(comparison)+
#     facet_wrap(~cell_type, ncol = 2)
#   print(p)
# }
res_df %>%
  dplyr::arrange(padj) %>%
  dplyr::filter(!is.na(padj) & (padj < .05)) %>%
  dplyr::mutate_if(is.numeric, signif, 2) %>%
  DT::datatable(rownames=FALSE, extensions = 'Buttons', options = list(dom='frtBip', buttons = c('csv')))
dir.create("results_list_211223", recursive=F, showWarnings = F)
write.table(res_df,"de_results_separate_analysis.tsv", quote=F, sep="\t",row.names = F)
library(tmod)
# data(tmod)
# 
# library(orthomapper)
# mousehum <- orthologs(10090, 9606)
# mousehum$h_symbol <- entrez_annotate(mousehum$T.9606)$SYMBOL
# mousehum$m_symbol <- entrez_annotate(mousehum$T.10090)$SYMBOL
# 
# mousehum <- mousehum[order(mousehum$h_symbol),]
# rows_tmod <- which(mousehum$h_symbol %in% tmod$gv)
# mgenes <- mousehum$m_symbol[rows_tmod]
# 
# list_tmod <- sapply(tmod$gs2gv, function(x) tmod$gv[x] )
# list_tmod <- sapply(list_tmod, function(x) x[order(x)])
# list_tmod <- sapply(list_tmod, function(x) x[which(x %in%  mousehum$h_symbol)])
# list_tmod <- sapply(list_tmod, function(x) mousehum[which(mousehum$h_symbol %in% x), "m_symbol"])
# list_tmod <- sapply(list_tmod, function(x) which(mgenes %in% x))
# 
# tmod_mm <- tmod
# 
# tmod_mm$gv <- mgenes
# tmod_mm$gs2gv <- list_tmod

# save tmod mouse object
# saveRDS(tmod_mm, "tmod_mm.rds")
tmod_mm <- readRDS("tmod_mm.rds")

df2tmod <- function(df, gene_id_col=ncol(df), module_id_col=1, module_title_col=2) {

  require(tmod, quietly=TRUE)

  gene_ids <- df[[gene_id_col]]
  module_ids <- df[[module_id_col]]
  m2g <- tapply(gene_ids, module_ids, list)
  df[[gene_id_col]] <- NULL
  df <- df[!duplicated(df[[module_id_col]]), ]
  colnames(df)[module_id_col] <- "ID"
  colnames(df)[module_title_col] <- "Title"

  makeTmod(modules=df, modules2genes=m2g)
}

df <- as.data.frame(msigdbr::msigdbr(species='Mus musculus'))
df <- df[ , c("gs_name", "gs_id", "gs_cat", "gs_subcat", "gene_symbol") ]
colnames(df) <- c("Title", "ID", "Category", "Subcategory", "GeneID")
msig <- df2tmod(df[!is.na(df$GeneID),], gene_id_col=ncol(df), module_id_col=2, module_title_col=1)
sel <- (msig$gs$Category %in% c("H"))  | (msig$gs$Subcategory %in% c('CP:REACTOME','GO:BP','CP:KEGG'))
res <- split(res_df, f = res_df$contrast)
contrasts <- names(res)

plot_panels <- function(contrast){
  # contrast <- contrasts[2]
  l <- split(res[[contrast]], f=res[[contrast]]$cell_type)
  genes <- lapply(l, function(x) x %>%
                    filter(!is.na(padj)) %>%
                    arrange(pvalue))
  lgenes <- lapply(genes, function(x) x$gene_name)
  res.tmod <- lapply(lgenes, tmodCERNOtest, mset = msig[sel])
  
  
  sgenes <- sapply(l, function(x) tmodDecideTests(x$gene_name, lfc = x$log2FoldChange,
                                                  pval = x$padj, lfc.thr=0, pval.thr = 0.05, mset = msig[sel]))
  names(sgenes) <- names(l)
  
  
  keep_results <- names(res.tmod)[sapply(res.tmod, function(x) x %>% filter(adj.P.Val<1e-8 , AUC >0.7)%>% nrow() >0)]
  relevant_results <- res.tmod[keep_results]
  
  if(length(relevant_results)>0){
    sgenes <- sgenes[names(relevant_results)]
    p <- ggPanelplot(relevant_results, sgenes = sgenes, auc_thr = 0,
                     q_thr = 1,
                     filter_row_q = 1e-8,
                     filter_row_auc = 0.7,
                     q_cutoff = 1e-12) + 
      ggtitle(contrast)
    print(p)
  }
}

lapply(contrasts, plot_panels)

## [[1]]

## 
## [[2]]
## NULL
## 
## [[3]]

## 
## [[4]]

## 
## [[5]]
## NULL
## 
## [[6]]
## NULL
## 
## [[7]]

## 
## [[8]]

## 
## [[9]]
## NULL
## 
## [[10]]

## 
## [[11]]
## NULL
## 
## [[12]]

## 
## [[13]]

## 
## [[14]]

## 
## [[15]]

res <- split(res_df, f = res_df$contrast)
contrasts <- names(res)

plot_panels <- function(contrast){
  # contrast <- contrasts[2]
  l <- split(res[[contrast]], f=res[[contrast]]$cell_type)
  genes <- lapply(l, function(x) x %>%
                    filter(!is.na(padj)) %>%
                    arrange(pvalue))
  lgenes <- lapply(genes, function(x) x$gene_name)
  res.tmod <- lapply(lgenes, tmodCERNOtest, mset = tmod_mm)
  
  
  sgenes <- sapply(l, function(x) tmodDecideTests(x$gene_name, lfc = x$log2FoldChange,
                                                  pval = x$padj, lfc.thr=0, pval.thr = 0.05, mset = tmod_mm))
  names(sgenes) <- names(l)
  
  
  keep_results <- names(res.tmod)[sapply(res.tmod, function(x) x %>% filter(adj.P.Val<1e-8 , AUC >0.7)%>% nrow() >0)]
  relevant_results <- res.tmod[keep_results]
  
  if(length(relevant_results)>0){
    sgenes <- sgenes[names(relevant_results)]
    p <- ggPanelplot(relevant_results, sgenes = sgenes, auc_thr = 0,
                     q_thr = 1,
                     filter_row_q = 1e-8,
                     filter_row_auc = 0.7,
                     q_cutoff = 1e-12) + 
      ggtitle(contrast)
    print(p)
  }
}

lapply(contrasts, plot_panels)

## [[1]]

## 
## [[2]]
## NULL
## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

## 
## [[9]]
## NULL
## 
## [[10]]
## NULL
## 
## [[11]]
## NULL
## 
## [[12]]

## 
## [[13]]

## 
## [[14]]
## NULL
## 
## [[15]]

sessionInfo()
## R version 4.3.1 (2023-06-16)
## Platform: x86_64-conda-linux-gnu (64-bit)
## Running under: Rocky Linux 8.7 (Green Obsidian)
## 
## Matrix products: default
## BLAS/LAPACK: /data/cephfs-1/work/groups/cubi/users/milekm_c/miniconda/envs/R-fixed-biomart/lib/libopenblasp-r0.3.24.so;  LAPACK version 3.11.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Europe/Berlin
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] tmod_0.50.13                stringr_1.5.0              
##  [3] ggrepel_0.9.3               DESeq2_1.40.2              
##  [5] SummarizedExperiment_1.30.2 Biobase_2.60.0             
##  [7] MatrixGenerics_1.12.2       matrixStats_1.0.0          
##  [9] GenomicRanges_1.52.0        GenomeInfoDb_1.36.1        
## [11] IRanges_2.34.1              S4Vectors_0.38.1           
## [13] BiocGenerics_0.46.0         cowplot_1.1.1              
## [15] ggplot2_3.4.3               gtools_3.9.4               
## [17] tidyr_1.3.0                 dplyr_1.1.3                
## [19] SeuratObject_4.1.4          Seurat_4.4.0               
## 
## loaded via a namespace (and not attached):
##   [1] RColorBrewer_1.1-3      rstudioapi_0.15.0       jsonlite_1.8.7         
##   [4] magrittr_2.0.3          spatstat.utils_3.0-3    farver_2.1.1           
##   [7] rmarkdown_2.25          zlibbioc_1.46.0         vctrs_0.6.3            
##  [10] ROCR_1.0-11             spatstat.explore_3.2-3  RCurl_1.98-1.12        
##  [13] S4Arrays_1.0.4          htmltools_0.5.6         sass_0.4.7             
##  [16] sctransform_0.4.0       parallelly_1.36.0       KernSmooth_2.23-22     
##  [19] bslib_0.5.1             htmlwidgets_1.6.2       ica_1.0-3              
##  [22] plyr_1.8.9              plotly_4.10.2           zoo_1.8-12             
##  [25] cachem_1.0.8            igraph_1.5.1            mime_0.12              
##  [28] lifecycle_1.0.3         pkgconfig_2.0.3         Matrix_1.6-1.1         
##  [31] R6_2.5.1                fastmap_1.1.1           GenomeInfoDbData_1.2.10
##  [34] fitdistrplus_1.1-11     future_1.33.0           shiny_1.7.5            
##  [37] digest_0.6.33           colorspace_2.1-0        patchwork_1.1.3        
##  [40] tensor_1.5              irlba_2.3.5.1           crosstalk_1.2.0        
##  [43] labeling_0.4.3          progressr_0.14.0        fansi_1.0.4            
##  [46] spatstat.sparse_3.0-2   httr_1.4.7              polyclip_1.10-6        
##  [49] abind_1.4-5             compiler_4.3.1          withr_2.5.1            
##  [52] BiocParallel_1.34.2     gplots_3.1.3            MASS_7.3-60            
##  [55] DelayedArray_0.26.6     caTools_1.18.2          tools_4.3.1            
##  [58] lmtest_0.9-40           beeswarm_0.4.0          msigdbr_7.5.1          
##  [61] httpuv_1.6.11           future.apply_1.11.0     goftest_1.2-3          
##  [64] glue_1.6.2              nlme_3.1-163            promises_1.2.1         
##  [67] grid_4.3.1              Rtsne_0.16              cluster_2.1.4          
##  [70] reshape2_1.4.4          generics_0.1.3          gtable_0.3.4           
##  [73] spatstat.data_3.0-1     data.table_1.14.8       XVector_0.40.0         
##  [76] sp_2.1-0                utf8_1.2.3              spatstat.geom_3.2-5    
##  [79] RcppAnnoy_0.0.21        RANN_2.6.1              pillar_1.9.0           
##  [82] babelgene_22.9          later_1.3.1             splines_4.3.1          
##  [85] lattice_0.21-9          survival_3.5-7          deldir_1.0-9           
##  [88] tidyselect_1.2.0        locfit_1.5-9.8          miniUI_0.1.1.1         
##  [91] pbapply_1.7-2           knitr_1.44              gridExtra_2.3          
##  [94] scattermore_1.2         xfun_0.40               pheatmap_1.0.12        
##  [97] DT_0.28                 stringi_1.7.12          tagcloud_0.6           
## [100] lazyeval_0.2.2          yaml_2.3.7              evaluate_0.22          
## [103] codetools_0.2-19        tibble_3.2.1            cli_3.6.1              
## [106] uwot_0.1.16             xtable_1.8-4            reticulate_1.32.0      
## [109] munsell_0.5.0           jquerylib_0.1.4         Rcpp_1.0.11            
## [112] globals_0.16.2          spatstat.random_3.1-6   png_0.1-8              
## [115] XML_3.99-0.14           parallel_4.3.1          ellipsis_0.3.2         
## [118] bitops_1.0-7            listenv_0.9.0           plotwidgets_0.5.1      
## [121] viridisLite_0.4.2       scales_1.2.1            ggridges_0.5.4         
## [124] crayon_1.5.2            leiden_0.4.3            purrr_1.0.2            
## [127] rlang_1.1.1